This page is about trend in the number of COVID-19 cases in the US

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(ggplot2)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Import data

covid_cum = read_csv("data/covid_cumulative_cases.csv", skip = 2) %>% 
  janitor::clean_names()
## Rows: 657 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): State, Date
## dbl (1): Total Cases
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_day = read_csv("data/covid_daily_cases.csv", skip = 2) %>% 
  janitor::clean_names()
## Rows: 656 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): State, Date
## dbl (3): New Cases, 7-Day Moving Avg, Historic Cases
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
covid_daily = left_join(covid_day, covid_cum, by = "date") %>% 
  select(date, new_cases, total_cases) %>% 
  separate(date, into = c("month","day","year")) %>% 
  mutate(month = factor(month, levels = month.abb)) %>% 
  group_by(year, month) %>% 
  arrange(year, month, day) %>% 
  mutate(date = make_date(year, month, day)) %>% 
  arrange(date)
  
covid_monthly = covid_daily %>% 
  select(-day) %>% 
  summarize(monthly = sum(new_cases))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
covid_seasonal = covid_monthly %>% 
  mutate(quarter = recode(month,
    "Jan" = "Q1",
    "Feb" = "Q1",
    "Mar" = "Q1",
    "Apr" = "Q2",
    "May" = "Q2",
    "Jun" = "Q2",
    "Jul" = "Q3",
    "Aug" = "Q3",
    "Sep" = "Q3",
    "Oct" = "Q4",
    "Nov" = "Q4",
    "Dec" = "Q4"
  )) %>% 
  group_by(year, quarter) %>% 
  summarize(quarterly = sum(monthly)) %>% 
  mutate(date = paste(year, "-", quarter))
## `summarise()` has grouped output by 'year'. You can override using the `.groups` argument.

Plots Daily

daily_fig = plot_ly(covid_daily) 

daily_fig %>% 
  add_trace(x = ~date, y = ~new_cases, type = "bar", yaxis="y", name = "new") %>% 
  add_trace(x = ~date, y = ~total_cases, type = "scatter", mode = "lines", yaxis = "y2", name = "cumulative") %>% 
  layout(yaxis=list(title = "daily new cases", side="left"),
         yaxis2=list(title = "cumulative cases", side="right",overlaying="y"),
         showlegend=TRUE)

Quarterly (to correspond with the consumption data)

covid_seasonal %>% 
  plot_ly(x = ~date, y = ~quarterly, type = "bar")